// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.

// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "WaapScanner.h"
#include "WaapScores.h"
#include "Waf2Engine.h"
#include "i_transaction.h"
#include <string>
#include "debug.h"
#include "reputation_features_events.h"
#include <boost/algorithm/string.hpp>

USE_DEBUG_FLAG(D_WAAP_SCANNER);
USE_DEBUG_FLAG(D_OA_SCHEMA_UPDATER);

// id generated by xml parser for an entity attribute
const std::string Waap::Scanner::xmlEntityAttributeId = "08a80340-06d3-11ea-9f87-0242ac11000f";

double Waap::Scanner::getScoreData(Waf2ScanResult& res, const std::string &poolName, bool applyLearning)
{
    std::string source = m_transaction->getSourceIdentifier();

    // Extract set of keyword_matches from keyword_matches, then from ngtags
    Waap::Keywords::KeywordsSet keywordsSet;
    Waap::Keywords::computeKeywordsSet(keywordsSet, res.keyword_matches, res.found_patterns);

    if (applyLearning) {
        std::string param_name = IndicatorsFiltersManager::generateKey(res.location, res.param_name, m_transaction);
        dbgTrace(D_WAAP_SCANNER) << "filter processing for parameter: " << param_name <<
        ", indicators count: " << keywordsSet.size();
        m_transaction->getAssetState()->logIndicatorsInFilters(param_name, keywordsSet, m_transaction);

        m_transaction->getAssetState()->filterKeywords(param_name, keywordsSet, res.filtered_keywords);
        if (m_transaction->getSiteConfig() != nullptr)
        {
            auto waapParams = m_transaction->getSiteConfig()->get_WaapParametersPolicy();
            if (waapParams != nullptr && waapParams->getParamVal("filtersVerbose", "false") == "true") {
                m_transaction->getAssetState()->filterVerbose(param_name, res.filtered_keywords);
            }
        }
        m_transaction->getAssetState()->filterKeywordsByParameters(res.param_name, keywordsSet);

        dbgTrace(D_WAAP_SCANNER) << "post filtering indicators count: " << keywordsSet.size();
    }
    // The keywords are only removed in production, they are still used while building scores
    if (!m_transaction->get_ignoreScore()) {
        m_transaction->getAssetState()->removeKeywords(keywordsSet);
    }

    // Filter keywords due to wbxml data format
    DeepParser &dp = m_transaction->getDeepParser();
    bool isBrokenWBXML = (m_transaction->getContentType() == Waap::Util::CONTENT_TYPE_WBXML) && (dp.depth() == 0) &&
        (dp.m_key.first().size() == 4 && dp.m_key.first() == "body" && !dp.isWBXmlData());

    // If wbxml data detected heuristically, or if not detected but declared by content-type in header
    if (dp.isWBXmlData() || isBrokenWBXML) {
        dbgTrace(D_WAAP_SCANNER) << "Filtering out wbxml keywords. isWbXmlData: " << dp.isWBXmlData() <<
            ", isBrokenWBXml:" << isBrokenWBXML;
        m_transaction->getAssetState()->removeWBXMLKeywords(keywordsSet, res.filtered_keywords);
    }

    // update keywords_matches
    res.keyword_matches.clear();
    for (auto keyword : keywordsSet) {
        res.keyword_matches.push_back(keyword);
    }
    std::sort(res.keyword_matches.begin(), res.keyword_matches.end());

    std::string keywords_string;
    std::vector<std::string> strippedKeywords;
    for (auto pKeyword = keywordsSet.begin(); pKeyword != keywordsSet.end(); ++pKeyword) {
        // Add spaces between the items, but not before the first one
        if (pKeyword != keywordsSet.begin()) {
            keywords_string += " ";
        }

        std::string k = *pKeyword;
        stripSpaces(k);
        keywords_string += k;
        strippedKeywords.push_back(k);
    }

    std::vector<std::string> newKeywords;
    for (auto pKeyword = keywordsSet.begin(); pKeyword != keywordsSet.end(); ++pKeyword) {
        std::string k = *pKeyword;
        stripSpaces(k);
        if (countSubstrings(keywords_string, k) < 2) {
            newKeywords.push_back(k);
        } else {
            if ((std::count(strippedKeywords.begin(), strippedKeywords.end(), k)  > 1) ) {
                if ((std::count(newKeywords.begin(), newKeywords.end(), k)  < 1)) {
                    newKeywords.push_back(k);
                }
            }
        }
    }

    std::sort(newKeywords.begin(), newKeywords.end());
    res.keywordsAfterFilter.clear();
    for (auto keyword : newKeywords) {
        res.keywordsAfterFilter.push_back(keyword);
    }
    double res_score = getScoreFromPool(res, newKeywords, poolName);

    return res_score;
}

double Waap::Scanner::getScoreFromPool(
    Waf2ScanResult &res, const std::vector<std::string> &newKeywords, const std::string &poolName
)
{
    res.scoreArray.clear();
    res.coefArray.clear();
    res.keywordCombinations.clear();
    KeywordsStats stats = m_transaction->getAssetState()->scoreBuilder.getSnapshotStats(poolName);

    if (!newKeywords.empty()) {
        // Collect scores of individual keywords
        Waap::Scores::calcIndividualKeywords(m_transaction->getAssetState()->scoreBuilder, poolName, newKeywords,
            res.scoreArray, res.coefArray);
        // Collect keyword combinations and their scores. Append scores to scoresArray,
        // and also populate m_scanResultKeywordCombinations list
        Waap::Scores::calcCombinations(m_transaction->getAssetState()->scoreBuilder, poolName, newKeywords,
            res.scoreArray, res.coefArray, res.keywordCombinations);
    }

    if (stats.isLinModel) {
        return Waap::Scores::calcLogisticRegressionScore(
            res.coefArray, stats.linModelIntercept, stats.linModelNNZCoef
        );
    }
    // use base_scores calculation
    return Waap::Scores::calcArrayScore(res.scoreArray);
}

// Ignore scan results from specific fields on csp-report json in case those are not filtered by learning
bool Waap::Scanner::isKeyCspReport(const std::string &key, Waf2ScanResult &res, DeepParser &dp)
{
    if (res.score < 8.0f && res.location == "body" && dp.getActualParser(0) == "jsonParser") {
        if (key == "csp-report.blocked-uri" || key == "csp-report.script-sample" ||
            (key == "csp-report.original-policy" && Waap::Util::containsCspReportPolicy(res.unescaped_line)) ) {
            dbgTrace(D_WAAP_SCANNER) << "CSP report detected, ignoring.";
            return true;
        }
    }
    return false;
}

bool Waap::Scanner::suspiciousHit(Waf2ScanResult& res, DeepParser &dp,
        const std::string& location, const std::string& param_name, const std::string& key)
{
    dbgTrace(D_WAAP_SCANNER) << "suspiciousHit processing for parameter: " << param_name << " at " << location <<
        " num of keywords " << res.keyword_matches.size();

    res.location = location;
    res.param_name = param_name; // remember the param name (analyzer needs it for reporting)

    // call shouldIgnoreOverride prior to score calculation, so that matched override keywords will be filtered
    // when an ignore override action is detected
    bool ignoreOverride = m_transaction->shouldIgnoreOverride(res);

    // Select scores pool by location
    std::string poolName = Waap::Scores::getScorePoolNameByLocation(location);

    Waf2ScanResult nonFilterRes = res;
    res.scoreNoFilter = getScoreData(nonFilterRes, poolName, false);

    double score = getScoreData(res, poolName);

    // call shouldIgnoreOverride post score calculation and filtering to evaluate ignore override effectivness
    res.score = score;
    m_transaction->shouldIgnoreOverride(res);

    dbgTrace(D_WAAP_SCANNER) << "score: " << score << " should ignore: " << ignoreOverride;
    // Add record about scores to the notes[] log (also reported in logs)
    if (score > 1.0f) {
        DetectionEvent(location, res.keyword_matches).notify();
        char buf[128];
        sprintf(buf, "%.3f", score);
        const std::string& res_location = m_transaction->getDeepParser().m_key.first();
        const std::string& res_param_name = m_transaction->getDeepParser().m_key.str();
        m_transaction->addNote(
            "sc:" + res_location + (res_param_name.empty() ? "" : "/" + res_param_name) + ":" + std::string(buf)
        );
    }

    if (isKeyCspReport(key, res, dp) || ignoreOverride) {
        dbgTrace(D_WAAP_SCANNER) << "Ignoring parameter key/value " << res.param_name <<
            " due to ignore action in override";
        res.score = 0;
        m_bIgnoreOverride = true;
        return false;
    }

    res.score = score;
    return m_transaction->reportScanResult(res);
}


int Waap::Scanner::onKv(const char* k, size_t k_len, const char* v, size_t v_len, int flags, size_t parser_depth) {
    Waf2ScanResult& res = m_lastScanResult;
    DeepParser &dp = m_transaction->getDeepParser();
    std::string key = std::string(k, k_len);
    std::string value = std::string(v, v_len);

    res.clear();
    dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: k='" << key <<
        "' v='" << value << "'";
    bool isCookiePayload = dp.m_key.first().size() == 6 && dp.m_key.first() == "cookie";
    bool isUrlParamPayload = dp.m_key.first().size() == 9 && dp.m_key.first() == "url_param";
    bool isSplitUrl = dp.m_key.first().size() == 3 &&
        dp.m_key.first() == "url" &&
        dp.m_key.str() != "";
    bool isHeaderPayload = dp.m_key.first().size() == 6 && dp.m_key.first() == "header";
    bool isRefererParamPayload =
        (dp.m_key.first().size() == 13 && dp.m_key.first() == "referer_param");
    bool isBodyPayload = dp.m_key.first().size() == 4 && dp.m_key.first() == "body";
    dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: depth=" <<
        dp.depth() << "; first='" << dp.m_key.first().c_str() << "'; key='" <<
        dp.m_key.str().c_str() << "'";

    // Collect URLs from values for openRedirect feature.
    m_transaction->getOpenRedirectState().collect(v, v_len, m_transaction->getHost());

    // Do not scan our own anti-bot cookie (match by name), it often false alarms.
    const std::string& fullKeyStr = dp.m_key.str();
    dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: fullKeyStr: '" << fullKeyStr << "'";
    //Get Anti bot cookie
    if(isCookiePayload && fullKeyStr == "__fn1522082288") {
        m_antibotCookie = value;
        dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: found Antibot Cookie: '" << m_antibotCookie << "'";
    }

    // Do not scan our own anti-bot cookie (match by name), it often false alarms.
    if (isCookiePayload &&
        (fullKeyStr.find("fnserr") != std::string::npos ||
            fullKeyStr.find("__fn1522082288") != std::string::npos ||
            fullKeyStr.find("_fn_nsess") != std::string::npos)) {
        dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: skip scanning our own anti-bot cookie, by name";
        return 0;
    }

    // Do not scan google analytics cookie
    if (isCookiePayload &&
        (fullKeyStr.find("_ga") != std::string::npos ||
        fullKeyStr.find("_gid") != std::string::npos ||
        fullKeyStr.find("_gat") != std::string::npos)) {
        dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: skip scanning google analytics cookie";
        return 0;
    }
    // scan for csrf token.
    if (isCookiePayload && fullKeyStr == "x-chkp-csrf-token") {
        m_transaction->getCsrfState().set_CsrfToken(v, v_len);
    }
    if (isHeaderPayload && fullKeyStr == "x-chkp-csrf-token") {
        m_transaction->getCsrfState().set_CsrfHeaderToken(v, v_len);
    }
    if (isBodyPayload && fullKeyStr == "x-chkp-csrf-token") {
        m_transaction->getCsrfState().set_CsrfFormToken(v, v_len);
    }

    if (dp.depth() == 0 &&
        isCookiePayload &&
        (v_len >= 2) &&
        ((v[0] == '"' && v[v_len - 1] == '"') || (v[0] == '\'' && v[v_len - 1] == '\''))
        ) {
        dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: removing quotes around cookie value: '" <<
            value << "'";
        // remove the quotes around the value
        v++;
        v_len -= 2;
        value = std::string(v, v_len);
    }
    res.location = dp.m_key.first();
    res.param_name = dp.m_key.str();
    res.unescaped_line = unescape(value);

    m_transaction->getAssetState()->logParamHit(res, m_transaction);

    std::set<std::string> paramTypes = m_transaction->getAssetState()->m_filtersMngr->getParameterTypes(
        IndicatorsFiltersManager::generateKey(res.location, res.param_name, m_transaction));

    if (paramTypes.size() == 1 && paramTypes.find("local_file_path") != paramTypes.end())
    {
        dbgTrace(D_WAAP_SCANNER) << "found parameter as local path, val : " << value;
        if ((value.find("http://") == 0 || value.find("https://") == 0) && !m_transaction->shouldIgnoreOverride(res))
        {
            res.score = 10.0;
            res.unescaped_line = value;
            res.keyword_matches.push_back("url_instead_of_file");
            m_transaction->addNote("sv: found url in " + res.location + "#" + res.param_name);
            m_transaction->reportScanResult(res);
            return 0;
        }
    }
    // Special value only matched when XML <!ENTITY> atribute is found.
    if (v_len == 36) {
        if (value == Waap::Scanner::xmlEntityAttributeId && !m_transaction->shouldIgnoreOverride(res)) {
            // Always return max score when <!ENTITY tag is encoutered during XML parsing.
            res.score = 10.0;
            res.unescaped_line = "<!ENTITY";
            res.keyword_matches.push_back("xml_entity");
            m_transaction->addNote("sv: found xml_entity in " + res.location + "#" + res.param_name);
            m_transaction->reportScanResult(res);
            return 0;
        }
    }

    // Scan parameter name
    bool badUrlEncoding =
        dp.m_key.depth() == 2 &&
        isUrlParamPayload && key != unescape(key) &&
        (!checkUrlEncoded(k, k_len) || !checkUrlEncoded(v, v_len));
    bool scanNameDueToSplitUrl = dp.m_key.depth() == 2 && isSplitUrl && key != "url.id";
    bool suspiciousName = dp.depth() == 0 &&
        (isCookiePayload || isRefererParamPayload || isUrlParamPayload || isBodyPayload) &&
        (!m_transaction->getAssetState()->getSignatures()-> good_header_name_re.hasMatch(key));

    dbgTrace(D_WAAP_SCANNER)
        << "badUrlEncoding="
        << badUrlEncoding
        << ", scanNameDueToSplitUrl="
        << scanNameDueToSplitUrl
        << ", suspiciousName"
        << suspiciousName;

    if (badUrlEncoding || scanNameDueToSplitUrl || suspiciousName) {
        dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: candidate to scan parameter names";

        // Deep-scan parameter names
        if (m_transaction->getAssetState()->apply(key, res, dp.m_key.first())) {
            if (suspiciousHit(res, dp, dp.m_key.first(), dp.m_key.str(), key)) {
                // Scanner found enough evidence to report this res
                dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: SUSPICIOUS PARAM NAME: k='" <<
                    key << "' v='" << value << "'";
#ifdef ENABLE_WAAP_ATTACK_IN_PARAM
                res.param_name = ATTACK_IN_PARAM;
                if (m_transaction->getScanResultPtr()) {
                    m_transaction->getScanResultPtr()->m_isAttackInParam = true;
                    m_transaction->getScanResultPtr()->param_name = ATTACK_IN_PARAM;
                }
                else {
                    dbgWarning(D_WAAP_SCANNER) << "Uninitialized m_scanResult during scanning parameter name (!!!)";
                }
#endif
                m_transaction->addNote("sn:" + res.location + (res.param_name.empty() ? "" : "/" + res.param_name));
            }
        }
    }
    Waf2ScanResult param_name_res = res;
    res.clear();

    // Scan parameter value
    if (m_transaction->getAssetState()->apply(value, res, dp.m_key.first(), dp.isBinaryData(),
        dp.getSplitType()))
    {
        if (!param_name_res.keyword_matches.empty() && !res.keyword_matches.empty() &&
            param_name_res.location == "url_param")
        {
            dbgTrace(D_WAAP_SCANNER) << "Found suspicios content in param name and value. Merging scans";
            res.mergeFrom(param_name_res);
        }

        if (suspiciousHit(res, dp, dp.m_key.first(), dp.m_key.str(), key)) {
            // Scanner found enough evidence to report this res
            dbgTrace(D_WAAP_SCANNER) << "Waap::Scanner::onKv: SUSPICIOUS VALUE: k='" << key <<
                "' v='" << value << "'";
            m_transaction->addNote("sv:" + res.location + (res.param_name.empty() ? "" : "/" + res.param_name));
        }
    }

    return 0;
}
